home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Libris Britannia 4
/
science library(b).zip
/
science library(b)
/
DDJMAG
/
DDJ8608.ZIP
/
LETTER.AUG
next >
Wrap
Text File
|
1986-08-31
|
24KB
|
733 lines
Listing One:
;DeSmet function isqrt(source);
; ...source is a long integer (32 bit)...
;Returns square root of source in ax, using Newton's method; see Scanlon's
;8086 book for similar function (Scanlon's is not sufficiently general, and has
;an error)...
;
;REGISTER USAGE cx:bx ...stores copy of 32-bit source throughout...
; di ...``last''estimate of isqrt(source)...
; si ...current estimate of isqrt(source)...
; dx:ax ...used to divide source by di...
cseg
public isqrt_
isqrt_: push bp
mov bp,sp
mov bx, [bp+4] ;Store a copy of source in cx:bx;
mov cx, [bp+6] ;cx:bx preserved till almostdone...
;-----Start block to determine initial estimate; base estimate on most---------
;-----significant non-zero byte of cx:bx---------------------------------------
cmp ch,0 ;Note 46341 covers largest positive
je test_cl ;long that most compilers will pass.
mov di,46341 ;If you use 32-bit unsigned, replace
jmp load_si ;with 65535, and if cx>=FFFEH, jump
;------------ ;out and set result= 65535.
test_cl: cmp cl,0
je text_bh
mov di,2896
jmp load_si
;------------
test_bh: cmp bh,0
je its_bl
mov di,181
jmp load_si
;------------
its_bl: mov di,8
load_si: mov si,di
;-----End block to determine initial estimate----------------------------------
;-----Begin loop to refine the estimate----------------------------------------
refine: mov dx,cx ;Load dx:ax pair with source in prep
mov ax,bx ;for divide by di=estimate...
div di
;------Block to average quotient and last estimate-------
shr ax,1 ;We can't just add di,ax then
adc di,0 ;shr di,1 because sum of di and ax
shr di,1 ;may exceed 65535...
;-----------
sub si,di ;Obtain difference betw. old (si)
jz almostdone ;and new estimates; if 0, we're
cmp si,1 ;almost done...Else if diff. is
je almostdone ;1 or -1 we're almost done...
cmp si,-1
je almostdone
mov si,di ;Store current value di in si as
;``old'' estimate for next iteration.
jmp refine
;-----End loop to refine the estimate------------------------------------------
almostdone: mov ax,di ;Check to see if estimate*estimate
mul di ;is less than cx:bx; this step is
sub bx,ax ;for fussbudgets who demand final
sbb cx,dx ;integer sqrt be < real sqrt; ditch
jns done ;it and save approx. 60 clocks...
dec di ;If product >cx:bx, subtract 1...
done: mov ax,di ;DeSmet looks for result in ax...
mov sp,bp
pop bp
ret
/* Test driver for isqrt()... */
main()
{
long start,i,NumSqrts;
printf(``ENTER start: '');
scanf(``%D'',&start);
printf(``ENTER NumSqrts: '');
scanf(``%D'',&NumSqrts);
printf(``isqrt(%D)= %u\n'',start,isqrt(start));
putchar('\007');
for(i=start;i<start+NumSqrts;++i)isqrt(i); /* Remove this isqrt() to find */
putchar('\007'); /* time taken by loop itself...*/
}
/* Another short driver; this one better for verifying algorithm */
main()
{
long source;
unsigned result;
printf(``ENTER # for sqrt; negative exits\n'');
while(printf(``ENTER #: ''),scanf(``%D'',&source),source>=OL){
result=isqrt(source);
printf(``result= SQRT(%D)= %u\n'',source,result);
printf(``result*result= %D\n'',((long)result)*((long)result));
printf(``(result+1)*(result+1)= %D\n\n'',(result+1L)*(result+1L));
}
}
-----------------------------------------------------------------------------
Listing Two: Bit-Shifting Method (Slower)
;DeSmet function 1sqrt(); takes a long (32-bit) integer as an argument, returns
;a short (16-bit) integer square root. Function result returned in ax.
;Modified after 68000 code published in DDJ #109, Nov. 1985, p. 90. Comments
;give roughly analogous 68000 instructions; correspondence with 68000 registers
;is:
; D0 = sp:si (initially holds argument)
; D1 = dx:di (Error term)
; D2 = ax (Running estimate)
; D3 = bx (High bracket; may exceed 16 bits on last iteration)
; D4 = cx (Loop counter)
;
;Note sp is used as a general register, so can't push or pop between
;``mov bp,sp'' and ``mov sp,bp''...also, we must disable DOS's timer
;interrupt, because it manipulates the stack every 55 milliseconds...
cseg
public lsqrt_
lsqrt_: push bp
mov bp,sp
;------Now can address stack; 4-byte argument starts at bp+4------------------
cli ;Clear interrupts (lock out)...
mov si,word [bp+4] ;Place argument in sp:si= ``DO''...
mov sp,word [bp+6]
xor di,di ;Zero ``D1'' and ``D2''...
mov dx,di
mov ax,di
mov cx,16 ;Loop counter cx= ``D4''...
sqrt1: shl si,1
rcl sp,1 ;``asl.l #1,D0''...
rcl di,1 ;``roxl.l #1,D1''...
rcl dx,1
shl si,1
rcl sp,1 ;Repeat shift and rotate...
rcl di,1
rcl dx,1
shl ax,1 ;``asl.l #1,D2''...
mov bx,ax ;``move.1 D2,D3''...
shl bx,1 ;``asl.l #1,D3''...
jc is_carry ;Jump out of loop if new ``D3''exceeds
;16 bits (may happen on last
;iteration)...
cmp dx,0
jb sqrt2 ;``cmp.l D3,D1''...
ja past1 ;``bls sqrt2''...
cmp di,bx
jbe sqrt2
past1: inc ax ;``addq.1 #1,D2''...
inc bx ;``addq.1 #1,D3''...
sub di,bx ;``sub.1 D3,D1''...
sbb dx,0
sqrt2: loop sqrt1 ;``dbra D4,sqrt1''...
jmp past3 ;Skip 'is_carry' block if we finished
;loop through 16 iterations (no jc,
;``D3'' stayed <17 bits)...
;------------------------------------------------------------------------------
is_carry; cmp dx,0001H ;If we got here, there was a carry
ja past2 ;for shl bx,1 on last iteration of
jb past3 ;loop; compare upper word ``D1''
cmp di,bx ;against upper word ``D3'', which is
jbe past3 ;now 0001H; then compare lower words
;of ``D1'' and ``D3''...
past2: inc ax ;``addq.1 #1,D2''...
;------------------------------------------------------------------------------
past3: sti ;Restore interrupts...
mov sp,bp ;Restore frame, function result
pop bp ;is already in ax....
ret
*************************************************************************
Listing Three:
* *
* Integer Square Root (32 to 16 bit). *
* *
* (Exact method, not approximate). *
* *
* Call with: *
* DO.L = Unsigned number. *
* *
* Returns: *
* DO.L = SQRT(DO.L) *
* *
* Notes: Result fits in DO.W, but is valid in longword. *
* Takes from 122 to 1272 cycles (including rts). *
* Averages 610 cycles measured over first 65535 roots. *
* Averages 1104 cycles measured over first 500000 roots. *
* *
*************************************************************************
.globl lsqrt
* Cycles
lsqrt tst.l d0 (4) ; Skip doing zero.
beg.s done (10/8)
cmp.l #$10000,d0 (14) ; If is a longword, use the long routine.
bhs.s glsqrt (10/8)
cmp.w #625,d0 (8) ; Would the short word routine be quicker?
bhi.s gsqrt (10/8) ; No, use general purpose word routine.
* ; Otherwise fall into special routine.
*
* For speed, we use three exit points.
* This is cheesy, but this is a speed-optimized subroutine!
*************************************************************************
* *
* Faster Integer Square Root (16 to 8 bit). For small arguments. *
* *
* (Exact method, not approximate). *
* *
* Call with: *
* DO.W = Unsigned number. *
* *
* Returns: *
* DO.W = SQRT(DO.W) *
* *
* Notes: Result fits in DO.B, but is valid in word. *
* Takes from 72 (d0=1) to 504 (d0=625) cycles *
* (including rts). *
* *
* Algorithm supplied by Motorola. *
* *
*************************************************************************
* Use the theorem that a perfect square is the sum of the first
* sqrt(arg) number of odd integers.
* Cycles
move.w d1,-(sp) (8)
move.w #-1,d1 (8)
qsqrt1 addq.w #2,d1 (4)
sub.w d1,d0 (4)
bpl qsqrt1 (10/8)
asr.w #1,d1 (8)
move.w d1,d0 (4)
move.w (sp)+,d1 (12)
done rts (16)
*************************************************************************
* *
* Integer Square Root (16 to 8 bit). *
* *
* (Exact method, not approximate). *
* *
* Call with: *
* DO.W = Unsigned number. *
* *
* Returns: *
* DO.L = SQRT(DO.W) *
* *
* Uses: D1-D4 as temporaries -- *
* D1 = Error term; *
* D2 = Running estimate; *
* D3 = High bracket; *
* D4 = Loop counter *
* *
* Notes: Result fits in DO.B, but is valid in word. *
* *
* Takes from 512 to 592 cycles (including rts). *
* *
* Instruction times for branch-type instructions *
* listed as (X/Y) are for (taken/not taken). *
* *
*************************************************************************
* Cycles
gsqrt movem.w d1-d4,-(sp) (24)
move.w #7,d4 (8) ; Loop count (bits-1 of result).
clr.w d1 (4) ; Error term in D1.
clr.w d2 (4)
sqrt1 add.w d0,d0 (4) ; Get 2 leading bits a time and add
addx.w d1,d1 (4) ; into Error term for interpolation.
add.w d0,d0 (4) ; (Classical method, easy in binary).
addx.w d1,d1 (4)
add.w d2,d2 (4) ; Running estimate * 2.
move.w d2,d3 (4)
add.w d3,d3 (4)
cmp.w d3,d1 (4)
bls.s sqrt2 (10/8) ; New Error term > 2* Running estimate?
addq.w #1,d2 (4) ; Yes, we want a `1' bit then.
addq.w #1,d3 (4) ; Fix up new Error term.
sub.w d3,d1 (4)
sqrt2 dbra d4,sqrt1 (10/14) ; Do all 8 bit-pairs.
move.w d2,d0 (4)
movem.w (sp)+,d1-d4 (28)
rts (16)
*************************************************************************
* *
* Integer Square Root (32 to 16 bit). *
* *
* (Exact Method, not approximate). *
* *
* Call with: *
* DO.L = Unsigned number. *
* *
* Returns: *
* DO.L = SQRT(DO.L) *
* *
* Uses: D1-D4 as temporaries -- *
* D1 = Error term; *
* D2 = Running estimate; *
* D3 = High bracket; *
* D4 = Loop counter. *
* *
* Notes: Result fits in DO.W, but is valid in longword. *
* *
* Takes from 1080 to 1236 cycles (including rts.) *
* *
* Two of the 16 passes are unrolled from the loop so that *
* quicker instructions may be used where there is no *
* danger of overflow (in the early passes). *
* *
* Instruction times for branch-type instructions *
* listed as (X/Y) are for (taken/not taken). *
* *
*************************************************************************
* Cycles
glsqrt movem.l d1-d4,-(sp) (40)
moveq #13,d4 (4) ; Loop count (bits-1 of result).
moveq #0,d1 (4) ; Error term in D1.
moveq #0,d2 (4)
lsqrt1 add.l d0,d0 (8) ; Get 2 leading bits a time and add
addx.w d1,d1 (4) ; into Error term for interpolation.
add.l d0,d0 (8) ; (Classical method, easy in binary).
addx.w d1,d1 (4)
add.w d2,d2 (4) ; Running estimate * 2.
move.w d2,d3 (4)
add.w d3,d3 (4)
cmp.w d3,d1 (4)
bls.s lsqrt2 (10/8) ; New Error term > 2* Running estimate?
addq.w #1,d2 (4) ; Yes, we want a `1' bit then.
addq.w #1,d3 (4) ; Fix up new Error term.
sub.w d3,d1 (4)
lsqrt2 dbra d4,lsqrt1 (10/14) ; Do first 14 bit-pairs.
add.l d0,d0 (8) ; Do 15-th bit-pair.
addx.w d1,d1 (4)
add.l d0,d0 (8)
addx.l d1,d1 (8)
add.w d2,d2 (4)
move.l d2,d3 (4)
add.w d3,d3 (4)
cmp.l d3,d1 (6)
bls.s lsqrt3 (10/8)
addq.w #1,d2 (4)
addq.w #1,d3 (4)
sub.l d3,d1 (8)
lsqrt3 add.l d0,d0 (8) ; Do 16-th bit-pair.
addx.l d1,d1 (8)
add.l d0,d0 (8)
addx.l d1,d1 (8)
add.w d2,d2 (4)
move.l d2,d3 (4)
add.l d3,d3 (8)
cmp.l d3,d1 (6)
bls.s lsqrt4 (10/8)
addq.w #1,d2 (4)
lsqrt4 move.w d2,d0 (4)
movem.l (sp)+,d1-d4 (44)
rts (16)
end
*************************************************************************
Listing Four:
* *
* Integer Square Root (32 to 16 bit). *
* *
* (Newton-Raphson method). *
* *
* Call with: *
* DO.L = Unsigned number. *
* *
* Returns: *
* DO.L = SQRT(DO.L) *
* *
* Notes: Result fits in DO.W, but is valid in longword. *
* Takes from 338 cycles (1 shift, 1 division) to *
* 1580 cycles (16 shifts, 4 divisions) (including rts). *
* Averages 854 cycles measured over first 65535 roots. *
* Averages 992 cycles measured over first 500000 roots. *
* *
*************************************************************************
.globl lsqrt
* Cycles
lsqrt movem.l d1-d2,-(sp) (24)
move.l d0,d1 (4) ; Set up for guessing algorithm.
beq.s return (10/8) ; Don't process zero.
moveq #1,d2 (4)
guess cmp.l d2,d1 (6) ; Get a guess that is guaranteed to be
bls.s newton (10/8) ; too high, but not by much, by dividing the
add.l d2,d2 (8) ; argument by two and multiplying a 1 by 2
lsr,l #1,d1 (10) ; until the power of two passes the modified
bra.s guess (10) ; argument, then average these two numbers.
newton add.l d1,d2 (8) ; Average the two guesses.
lsr.l #1,d2 (10)
move.l d0,d1 (4) ; Generate the next approximation(s)
divu d2,d1 (140) ; via the Newton-Raphson method.
bvs.s done (10/8) ; Handle out-of-range input (cheats!)
cmp.w d1,d2 (4) ; Have we converged?
bls.s done (10/8)
swap d1 (4) ; No, kill the remainder so the
clr.w d1 (4) ; next average comes out right.
swap d1 (4)
bra.s newton (10)
done clr.w d0 (4) ; Return a word answer in longword.
swap d0 (4)
move.w d2,d0 (4)
return movem.l (sp)+,d1-d2 (28)
rts (16)
end
**************************************************************************
Listing Five:
Program Squareroot;
{
Squareroot algorithm & testprogram; DDJ March 1986, p.122
Features: - sqrt routine in 68000 machine language;
- long integer loopcount;
}
const { Iteration count for test loop }
NNR = 6E4; { real, for printing of statistics }
NNS = '60000'; { string, for assignment to long integer }
type long = record
low,high : integer;
end;
var finished : boolean; { flag for loop }
number, limit : long; { loop count, loop limit }
sqrrt, { square root result }
sqrrto, { last displayed square root }
t1,t2 : integer; { parameters for system time }
time1, time2 : real; { start, end time }
{ --- ROUTINES FOR LONG (32-BIT) INTEGER SUPPORT --- }
procedure lg_clr(var l:long); external;
{ Clears long integer l }
procedure lg_asn_DU(s:string; var l:long); external;
{ Assigns the unsigned decimal string s to the long integer l }
procedure lg_inc1(var l:long); external;
{ Increments long integer l by 1 }
procedure lg_grt(a,b:long; var flag:boolean); external;
{ Compares long integers a and b and assign result to flag }
{ --- SQUAREROOT ROUTINE --- }
procedure sqrt(number:long; var result:integer); external;
{ Calculates square root of 'number' and returns it in 'result' }
begin { main }
sqrrto := 100;
lg_clr(number); { number := 0 }
lg_asn_DU(NNS,limit); {limit := NNS }
finished := false;
write('Start...');
time(t1,t2); time1 := t2; { get start time }
while not finished do { calculate sqrt(number) }
begin
sqrt(number,sqrrt);
{
if sqrrt <> sqrrto
then begin
write ('Number = ',number.high:6,' | ',number.low:6);
writeln(' --- Sqrt = ',sqrrt:4);
sqrrto := sqrrt;
end;
}
lg_inc1(number); { number := number + 1 }
lg_grt(number,limit,finished); { finished := (number > limit) }
end;
time(t1,t2); time2 := t2; { get end time }
writeln('finished !'); writeln;
writeln('Time: ',(time2-time1)/60,' seconds');
end.
*******************************************************************************
Listing Six:
1 *
2 * Squareroot algorithm; DDJ March 1986, p.122
3 *
4 * 68000 assembly language version
5 *
6 * Features: - equivalent to compiler-generated code;
7 *
8 *
9 * procedure sqrt(number:long; var result:integer);
10 *
11 * Calculates integer square root of 'number' and returns it in 'result';
12 *
13 *
14 * Register usage:
15 * --------------
16 *
17 * D0 : word register A0 : parameter stack pointer
18 * D1 : number A1 : scratch register
19 * D2 : guess1
20 * D3 : guess2
21 * D4 : error
22 *
23 proc sqrt,2 ;2 words of parameters
24 *
25 * Get parameters from stack
26 *
27 move.l (sp)+,a0 ;get return address
28 move.w 2(sp),a1 ;get ^number
29 move.l (a1),d1 ;get number
30 *
31 * bra Q15 ;--- for timing only
32 *
33 beq Q8 ;if number=0, skip
34 *
35 * Set initial values
36 *
37 Q7 moveq #1,d2 ;guess1 := 1
38 move.1 d1,d3 ;guess2 := number
39 *
40 * Do shifts until guess1 ~~~ guess2
41 *
42 Q9 move.l d2,d0 ;guess1 to work register
43 asl.l #1,d0 ;guess1 * 2
44 cmp.l d3,d0 ;compare with guess2
45 bge Q11 ;branch if guess1 * 2 >= guess2
46 asl.l #1,d2 ;guess1 := guess1 * 2
47 asr.l #1,d3 ;guess2 := guess2 / 2
48 bra Q9
49 *
50 * Now do divisions
51 *
52 Q11
53 Q13 add.l d3,d2 ;guess := guess1 + guess2
54 asr.l #1,d2 ;guess1 := (guess1+guess2)/2
55 move.l d1,d0 ;number to work register
56 divs d2,d0 ;number / guess1
57 move.w d0,d3 ;guess2 := number/guess1
58 ext.l d3 ;extend to 32 bits
59 move.l d2,d0 ;guess1 to work register
60 sub.l d3,d0 ;guess1-guess2
61 move.l d0,d4 ;error := guess1-guess2
62 ble Q14 ;if error <= 0
63 bra Q13 ;loop back if error > 0
64 Q14 move.l d2,d0 ;result := guess1
65 bra Q15
66 Q8 moveq #0,d0 ;result := 0
67 *
68 * Set result & return to caller
69 *
70 Q15 movea.w (sp)+,a1 ;get ^result
71 move.w d0,(a1) ;store result
72 *
73 addq.l #2,sp ;drop ^number
74 jmp (a0) ;return to caller
75 *
76 .nolist
******************************************************************************
Listing Seven:
1 *
2 * Squareroot algorithm; DDJ March 1986, p.122
3 *
4 * 68000 assembly language version
5 *
6 * Features: - hand-optimized machine code;
7 *
8 *
9 * procedure sqrt(number:integer; var result:integer);
10 *
11 * Calculates square root of 'number' and returns it in 'result';
12 *
13 *
14 * Register usage:
15 * --------------
16 *
17 * D0 : work register, error A0 : ^result
18 * D1 : number A1 : ^number
19 * D2 : guess1,result
20 * D3 : guess2
21 * D4 : temporary storage for return address
22 *
23 *
24 .proc sqrt,2 ;2 words of parameters
25 *
26 * Get parameters from stack
27 *
28 moveq #0,d2 ;result := 0 --- remove for timing
29 *
30 move.l (sp)+,d4 ;get return address
31 move.w (sp)+,a0 ;get ^result
32 move.w (sp)+,a1 ;get ^number
33 move.l (a1),d1 ;get number
34 *
35 * bra Q15 ;--- for timing only
36 *
37 beq Q15 ;if number=0, then result=0
38 *
39 * Set initial values
40 *
41 Q7 moveq #1,d2 ;guess1 := 1
42 move.l d1,d3 ;guess2 := number
43 *
44 * Do shifts until guess1 ~ guess2
45 *
46 Q9 asl.l #1,d2 ;guess1 := guess1 * 2
47 cmp.l d3,d2 ;compare with guess2
48 bge Q11 ;branch if guess1 * 2 >= guess2
49 asr.l #1,d3 ;guess2 := guess2 / 2
50 bra Q9
51 *
52 Q11 asr.l #1,d2 ;adjust guess1
53 *
54 * Now do divisions
55 *
56 Q13 add.l d3,d2 ;guess1 := guess1 + guess2
57 asr.l #1,d2 ;guess1 := (guess1+guess2)/2
58 move.l d1,d3 ;guess2 := number
59 divs d2,d3 ;guess2 := number / guess1
60 ext.l d3 ;extend guess2 to 32 bits
61 move.l d2,d0 ;guess1 to work register
62 sub.l d3,d0 ;error := guess1 - guess2
63 bgt Q13 ;loop back if error > 0
64 *
65 * Store result and return to caller
66 *
67 Q15 move.w d2,(a0) ;store result
68 *
69 movea.l d4,a0 ;move return address to adr-reg
70 jmp (a0) ;return to caller
71 *
72 .nolist
***************************************************************************
Listing Eight:
1 *
2 * Squareroot algorithm; DDJ November 1985, p.88
3 *
4 * 68000 assembly language
5 *
6 * procedure sqrt(number:long; var result:integer);
7 *
8 * Calculates the integer squareroot of 'number' and returns it in 'result'
9 *
10 *
11 * Register usage
12 * --------------
13 *
14 * D0 : number A0 : scratch, for pointers
15 * D1 : error term A1 : scratch, for pointers
16 * D2 : estimate
17 * D3 : corrector term
18 * D4 : loop counter
19 *
20 *
21 .proc sqrt,2 ;2 words of parameters
22 *
23 move.w 6(sp),a0 ;get ^number
24 move.l (a0),d0 ;get number into d0
25 *
26 * bra exit ;--- for timing only
27 *
28 moveq #16-1,d4 ;set loopcount,16*2 = 32 bits
29 moveq #0,d1 ;clear error term
30 moveq #0,d2 ;clear estimate
31 *
32 * Calculate squareroot
33 *
34 sqrtl asl.l #1,d0 ;get 2 leading bits,
35 roxl.l #1,d1 ;one at a time, into
36 asl.l #1,d0 ;the error term
37 roxl.l #1,d1
38 asl.l #1,d2 ;estimate * 2
39 move.l d2,d3
40 asl.l #1,d3 ;corrector = 2 * new estimate
41 cmp.l d3,d1
42 bls sqrt2 ;branch if error term <= corrector
43 addq.l #1,d2 ;otherwise, add low bit to estimate
44 addq.l #1,d3
45 sub.l d3,d1 ;and calculate new error term
46 *
47 sqrt2 dbra d4,sqrtl ;do all 16 bit-pairs
48 *
49 * Set result & return to caller
50 *
51 exit move.l (sp)+,a0 ;get return address
52 move.w (sp)+,a1 ;get ^result
53 move.w d2,(a1) ;store integer result
54 addq.l #2,sp ;drop ^number
55 jmp (a0) ;return to caller
56 *
57 .nolist